In [ ]:
import pandas as pd
In [6]:
#importing dataset
df = pd.read_csv('iris.csv', names=['sepal length', 'sepal width', 'petal length', 'petal width', 'target'])
In [7]:
df.head()
Out[7]:
sepal length sepal width petal length petal width target
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa

Standarizing Dataset

In [8]:
from sklearn.preprocessing import StandardScaler
In [9]:
#seperating data frame into features and target
features = ['sepal length', 'sepal width', 'petal length', 'petal width']
x = df.loc[:, features]
y = df.loc[:, ['target']]
In [12]:
scaled_x = StandardScaler().fit_transform(x)
In [13]:
scaled_x
Out[13]:
array([[-9.00681170e-01,  1.03205722e+00, -1.34127240e+00,
        -1.31297673e+00],
       [-1.14301691e+00, -1.24957601e-01, -1.34127240e+00,
        -1.31297673e+00],
       [-1.38535265e+00,  3.37848329e-01, -1.39813811e+00,
        -1.31297673e+00],
       [-1.50652052e+00,  1.06445364e-01, -1.28440670e+00,
        -1.31297673e+00],
       [-1.02184904e+00,  1.26346019e+00, -1.34127240e+00,
        -1.31297673e+00],
       [-5.37177559e-01,  1.95766909e+00, -1.17067529e+00,
        -1.05003079e+00],
       [-1.50652052e+00,  8.00654259e-01, -1.34127240e+00,
        -1.18150376e+00],
       [-1.02184904e+00,  8.00654259e-01, -1.28440670e+00,
        -1.31297673e+00],
       [-1.74885626e+00, -3.56360566e-01, -1.34127240e+00,
        -1.31297673e+00],
       [-1.14301691e+00,  1.06445364e-01, -1.28440670e+00,
        -1.44444970e+00],
       [-5.37177559e-01,  1.49486315e+00, -1.28440670e+00,
        -1.31297673e+00],
       [-1.26418478e+00,  8.00654259e-01, -1.22754100e+00,
        -1.31297673e+00],
       [-1.26418478e+00, -1.24957601e-01, -1.34127240e+00,
        -1.44444970e+00],
       [-1.87002413e+00, -1.24957601e-01, -1.51186952e+00,
        -1.44444970e+00],
       [-5.25060772e-02,  2.18907205e+00, -1.45500381e+00,
        -1.31297673e+00],
       [-1.73673948e-01,  3.11468391e+00, -1.28440670e+00,
        -1.05003079e+00],
       [-5.37177559e-01,  1.95766909e+00, -1.39813811e+00,
        -1.05003079e+00],
       [-9.00681170e-01,  1.03205722e+00, -1.34127240e+00,
        -1.18150376e+00],
       [-1.73673948e-01,  1.72626612e+00, -1.17067529e+00,
        -1.18150376e+00],
       [-9.00681170e-01,  1.72626612e+00, -1.28440670e+00,
        -1.18150376e+00],
       [-5.37177559e-01,  8.00654259e-01, -1.17067529e+00,
        -1.31297673e+00],
       [-9.00681170e-01,  1.49486315e+00, -1.28440670e+00,
        -1.05003079e+00],
       [-1.50652052e+00,  1.26346019e+00, -1.56873522e+00,
        -1.31297673e+00],
       [-9.00681170e-01,  5.69251294e-01, -1.17067529e+00,
        -9.18557817e-01],
       [-1.26418478e+00,  8.00654259e-01, -1.05694388e+00,
        -1.31297673e+00],
       [-1.02184904e+00, -1.24957601e-01, -1.22754100e+00,
        -1.31297673e+00],
       [-1.02184904e+00,  8.00654259e-01, -1.22754100e+00,
        -1.05003079e+00],
       [-7.79513300e-01,  1.03205722e+00, -1.28440670e+00,
        -1.31297673e+00],
       [-7.79513300e-01,  8.00654259e-01, -1.34127240e+00,
        -1.31297673e+00],
       [-1.38535265e+00,  3.37848329e-01, -1.22754100e+00,
        -1.31297673e+00],
       [-1.26418478e+00,  1.06445364e-01, -1.22754100e+00,
        -1.31297673e+00],
       [-5.37177559e-01,  8.00654259e-01, -1.28440670e+00,
        -1.05003079e+00],
       [-7.79513300e-01,  2.42047502e+00, -1.28440670e+00,
        -1.44444970e+00],
       [-4.16009689e-01,  2.65187798e+00, -1.34127240e+00,
        -1.31297673e+00],
       [-1.14301691e+00,  1.06445364e-01, -1.28440670e+00,
        -1.44444970e+00],
       [-1.02184904e+00,  3.37848329e-01, -1.45500381e+00,
        -1.31297673e+00],
       [-4.16009689e-01,  1.03205722e+00, -1.39813811e+00,
        -1.31297673e+00],
       [-1.14301691e+00,  1.06445364e-01, -1.28440670e+00,
        -1.44444970e+00],
       [-1.74885626e+00, -1.24957601e-01, -1.39813811e+00,
        -1.31297673e+00],
       [-9.00681170e-01,  8.00654259e-01, -1.28440670e+00,
        -1.31297673e+00],
       [-1.02184904e+00,  1.03205722e+00, -1.39813811e+00,
        -1.18150376e+00],
       [-1.62768839e+00, -1.74477836e+00, -1.39813811e+00,
        -1.18150376e+00],
       [-1.74885626e+00,  3.37848329e-01, -1.39813811e+00,
        -1.31297673e+00],
       [-1.02184904e+00,  1.03205722e+00, -1.22754100e+00,
        -7.87084847e-01],
       [-9.00681170e-01,  1.72626612e+00, -1.05694388e+00,
        -1.05003079e+00],
       [-1.26418478e+00, -1.24957601e-01, -1.34127240e+00,
        -1.18150376e+00],
       [-9.00681170e-01,  1.72626612e+00, -1.22754100e+00,
        -1.31297673e+00],
       [-1.50652052e+00,  3.37848329e-01, -1.34127240e+00,
        -1.31297673e+00],
       [-6.58345429e-01,  1.49486315e+00, -1.28440670e+00,
        -1.31297673e+00],
       [-1.02184904e+00,  5.69251294e-01, -1.34127240e+00,
        -1.31297673e+00],
       [ 1.40150837e+00,  3.37848329e-01,  5.35295827e-01,
         2.64698913e-01],
       [ 6.74501145e-01,  3.37848329e-01,  4.21564419e-01,
         3.96171883e-01],
       [ 1.28034050e+00,  1.06445364e-01,  6.49027235e-01,
         3.96171883e-01],
       [-4.16009689e-01, -1.74477836e+00,  1.37235899e-01,
         1.33225943e-01],
       [ 7.95669016e-01, -5.87763531e-01,  4.78430123e-01,
         3.96171883e-01],
       [-1.73673948e-01, -5.87763531e-01,  4.21564419e-01,
         1.33225943e-01],
       [ 5.53333275e-01,  5.69251294e-01,  5.35295827e-01,
         5.27644853e-01],
       [-1.14301691e+00, -1.51337539e+00, -2.60824029e-01,
        -2.61192967e-01],
       [ 9.16836886e-01, -3.56360566e-01,  4.78430123e-01,
         1.33225943e-01],
       [-7.79513300e-01, -8.19166497e-01,  8.03701950e-02,
         2.64698913e-01],
       [-1.02184904e+00, -2.43898725e+00, -1.47092621e-01,
        -2.61192967e-01],
       [ 6.86617933e-02, -1.24957601e-01,  2.50967307e-01,
         3.96171883e-01],
       [ 1.89829664e-01, -1.97618132e+00,  1.37235899e-01,
        -2.61192967e-01],
       [ 3.10997534e-01, -3.56360566e-01,  5.35295827e-01,
         2.64698913e-01],
       [-2.94841818e-01, -3.56360566e-01, -9.02269170e-02,
         1.33225943e-01],
       [ 1.03800476e+00,  1.06445364e-01,  3.64698715e-01,
         2.64698913e-01],
       [-2.94841818e-01, -1.24957601e-01,  4.21564419e-01,
         3.96171883e-01],
       [-5.25060772e-02, -8.19166497e-01,  1.94101603e-01,
        -2.61192967e-01],
       [ 4.32165405e-01, -1.97618132e+00,  4.21564419e-01,
         3.96171883e-01],
       [-2.94841818e-01, -1.28197243e+00,  8.03701950e-02,
        -1.29719997e-01],
       [ 6.86617933e-02,  3.37848329e-01,  5.92161531e-01,
         7.90590793e-01],
       [ 3.10997534e-01, -5.87763531e-01,  1.37235899e-01,
         1.33225943e-01],
       [ 5.53333275e-01, -1.28197243e+00,  6.49027235e-01,
         3.96171883e-01],
       [ 3.10997534e-01, -5.87763531e-01,  5.35295827e-01,
         1.75297293e-03],
       [ 6.74501145e-01, -3.56360566e-01,  3.07833011e-01,
         1.33225943e-01],
       [ 9.16836886e-01, -1.24957601e-01,  3.64698715e-01,
         2.64698913e-01],
       [ 1.15917263e+00, -5.87763531e-01,  5.92161531e-01,
         2.64698913e-01],
       [ 1.03800476e+00, -1.24957601e-01,  7.05892939e-01,
         6.59117823e-01],
       [ 1.89829664e-01, -3.56360566e-01,  4.21564419e-01,
         3.96171883e-01],
       [-1.73673948e-01, -1.05056946e+00, -1.47092621e-01,
        -2.61192967e-01],
       [-4.16009689e-01, -1.51337539e+00,  2.35044910e-02,
        -1.29719997e-01],
       [-4.16009689e-01, -1.51337539e+00, -3.33612130e-02,
        -2.61192967e-01],
       [-5.25060772e-02, -8.19166497e-01,  8.03701950e-02,
         1.75297293e-03],
       [ 1.89829664e-01, -8.19166497e-01,  7.62758643e-01,
         5.27644853e-01],
       [-5.37177559e-01, -1.24957601e-01,  4.21564419e-01,
         3.96171883e-01],
       [ 1.89829664e-01,  8.00654259e-01,  4.21564419e-01,
         5.27644853e-01],
       [ 1.03800476e+00,  1.06445364e-01,  5.35295827e-01,
         3.96171883e-01],
       [ 5.53333275e-01, -1.74477836e+00,  3.64698715e-01,
         1.33225943e-01],
       [-2.94841818e-01, -1.24957601e-01,  1.94101603e-01,
         1.33225943e-01],
       [-4.16009689e-01, -1.28197243e+00,  1.37235899e-01,
         1.33225943e-01],
       [-4.16009689e-01, -1.05056946e+00,  3.64698715e-01,
         1.75297293e-03],
       [ 3.10997534e-01, -1.24957601e-01,  4.78430123e-01,
         2.64698913e-01],
       [-5.25060772e-02, -1.05056946e+00,  1.37235899e-01,
         1.75297293e-03],
       [-1.02184904e+00, -1.74477836e+00, -2.60824029e-01,
        -2.61192967e-01],
       [-2.94841818e-01, -8.19166497e-01,  2.50967307e-01,
         1.33225943e-01],
       [-1.73673948e-01, -1.24957601e-01,  2.50967307e-01,
         1.75297293e-03],
       [-1.73673948e-01, -3.56360566e-01,  2.50967307e-01,
         1.33225943e-01],
       [ 4.32165405e-01, -3.56360566e-01,  3.07833011e-01,
         1.33225943e-01],
       [-9.00681170e-01, -1.28197243e+00, -4.31421141e-01,
        -1.29719997e-01],
       [-1.73673948e-01, -5.87763531e-01,  1.94101603e-01,
         1.33225943e-01],
       [ 5.53333275e-01,  5.69251294e-01,  1.27454998e+00,
         1.71090158e+00],
       [-5.25060772e-02, -8.19166497e-01,  7.62758643e-01,
         9.22063763e-01],
       [ 1.52267624e+00, -1.24957601e-01,  1.21768427e+00,
         1.18500970e+00],
       [ 5.53333275e-01, -3.56360566e-01,  1.04708716e+00,
         7.90590793e-01],
       [ 7.95669016e-01, -1.24957601e-01,  1.16081857e+00,
         1.31648267e+00],
       [ 2.12851559e+00, -1.24957601e-01,  1.61574420e+00,
         1.18500970e+00],
       [-1.14301691e+00, -1.28197243e+00,  4.21564419e-01,
         6.59117823e-01],
       [ 1.76501198e+00, -3.56360566e-01,  1.44514709e+00,
         7.90590793e-01],
       [ 1.03800476e+00, -1.28197243e+00,  1.16081857e+00,
         7.90590793e-01],
       [ 1.64384411e+00,  1.26346019e+00,  1.33141568e+00,
         1.71090158e+00],
       [ 7.95669016e-01,  3.37848329e-01,  7.62758643e-01,
         1.05353673e+00],
       [ 6.74501145e-01, -8.19166497e-01,  8.76490051e-01,
         9.22063763e-01],
       [ 1.15917263e+00, -1.24957601e-01,  9.90221459e-01,
         1.18500970e+00],
       [-1.73673948e-01, -1.28197243e+00,  7.05892939e-01,
         1.05353673e+00],
       [-5.25060772e-02, -5.87763531e-01,  7.62758643e-01,
         1.57942861e+00],
       [ 6.74501145e-01,  3.37848329e-01,  8.76490051e-01,
         1.44795564e+00],
       [ 7.95669016e-01, -1.24957601e-01,  9.90221459e-01,
         7.90590793e-01],
       [ 2.24968346e+00,  1.72626612e+00,  1.67260991e+00,
         1.31648267e+00],
       [ 2.24968346e+00, -1.05056946e+00,  1.78634131e+00,
         1.44795564e+00],
       [ 1.89829664e-01, -1.97618132e+00,  7.05892939e-01,
         3.96171883e-01],
       [ 1.28034050e+00,  3.37848329e-01,  1.10395287e+00,
         1.44795564e+00],
       [-2.94841818e-01, -5.87763531e-01,  6.49027235e-01,
         1.05353673e+00],
       [ 2.24968346e+00, -5.87763531e-01,  1.67260991e+00,
         1.05353673e+00],
       [ 5.53333275e-01, -8.19166497e-01,  6.49027235e-01,
         7.90590793e-01],
       [ 1.03800476e+00,  5.69251294e-01,  1.10395287e+00,
         1.18500970e+00],
       [ 1.64384411e+00,  3.37848329e-01,  1.27454998e+00,
         7.90590793e-01],
       [ 4.32165405e-01, -5.87763531e-01,  5.92161531e-01,
         7.90590793e-01],
       [ 3.10997534e-01, -1.24957601e-01,  6.49027235e-01,
         7.90590793e-01],
       [ 6.74501145e-01, -5.87763531e-01,  1.04708716e+00,
         1.18500970e+00],
       [ 1.64384411e+00, -1.24957601e-01,  1.16081857e+00,
         5.27644853e-01],
       [ 1.88617985e+00, -5.87763531e-01,  1.33141568e+00,
         9.22063763e-01],
       [ 2.49201920e+00,  1.72626612e+00,  1.50201279e+00,
         1.05353673e+00],
       [ 6.74501145e-01, -5.87763531e-01,  1.04708716e+00,
         1.31648267e+00],
       [ 5.53333275e-01, -5.87763531e-01,  7.62758643e-01,
         3.96171883e-01],
       [ 3.10997534e-01, -1.05056946e+00,  1.04708716e+00,
         2.64698913e-01],
       [ 2.24968346e+00, -1.24957601e-01,  1.33141568e+00,
         1.44795564e+00],
       [ 5.53333275e-01,  8.00654259e-01,  1.04708716e+00,
         1.57942861e+00],
       [ 6.74501145e-01,  1.06445364e-01,  9.90221459e-01,
         7.90590793e-01],
       [ 1.89829664e-01, -1.24957601e-01,  5.92161531e-01,
         7.90590793e-01],
       [ 1.28034050e+00,  1.06445364e-01,  9.33355755e-01,
         1.18500970e+00],
       [ 1.03800476e+00,  1.06445364e-01,  1.04708716e+00,
         1.57942861e+00],
       [ 1.28034050e+00,  1.06445364e-01,  7.62758643e-01,
         1.44795564e+00],
       [-5.25060772e-02, -8.19166497e-01,  7.62758643e-01,
         9.22063763e-01],
       [ 1.15917263e+00,  3.37848329e-01,  1.21768427e+00,
         1.44795564e+00],
       [ 1.03800476e+00,  5.69251294e-01,  1.10395287e+00,
         1.71090158e+00],
       [ 1.03800476e+00, -1.24957601e-01,  8.19624347e-01,
         1.44795564e+00],
       [ 5.53333275e-01, -1.28197243e+00,  7.05892939e-01,
         9.22063763e-01],
       [ 7.95669016e-01, -1.24957601e-01,  8.19624347e-01,
         1.05353673e+00],
       [ 4.32165405e-01,  8.00654259e-01,  9.33355755e-01,
         1.44795564e+00],
       [ 6.86617933e-02, -1.24957601e-01,  7.62758643e-01,
         7.90590793e-01]])

PCA

Our features are 4-dimensional, using PCA we will reudce it to 2-D

In [15]:
from sklearn.decomposition import PCA
In [16]:
pca = PCA(n_components = 2)
In [17]:
principleComponents = pca.fit_transform(scaled_x)
In [18]:
#converting back to DataFrame
principle_DF = pd.DataFrame(principleComponents, columns=['pc 1', 'pc 2'])
In [20]:
principle_DF.head()
Out[20]:
pc 1 pc 2
0 -2.264542 0.505704
1 -2.086426 -0.655405
2 -2.367950 -0.318477
3 -2.304197 -0.575368
4 -2.388777 0.674767
In [24]:
final_DF = pd.concat([principle_DF, df[['target']]], axis=1)
In [25]:
final_DF.head()
Out[25]:
pc 1 pc 2 target
0 -2.264542 0.505704 Iris-setosa
1 -2.086426 -0.655405 Iris-setosa
2 -2.367950 -0.318477 Iris-setosa
3 -2.304197 -0.575368 Iris-setosa
4 -2.388777 0.674767 Iris-setosa
In [27]:
final_DF.shape
Out[27]:
(150, 3)
In [ ]: